On cherche à étudier l’effet de trois facteurs sur le transcriptome des racines d’Arabidopsis thaliana et de la micro Tomate.
load(paste0("./GenesCO2_", specie, ".RData"))
load("./normalized.count_At.RData")
# quantification file
data <- read.csv("quantifFiles/QuantifGenes.csv", h = T, sep = ",")
rownames(data) <- data$Gene
genes = which(!(grepl("__", rownames(data))))
not_quant = data[which((grepl("__", rownames(data)))), ]
data = data[genes, grepl("R", colnames(data))]
keep <- rowSums(data) >= 10
data <- data[keep, ]
group <- sapply(colnames(data), getLabel, with.rep = F)
colnames(data) <- sapply(colnames(data), getLabel)
specie = "At"
clusteredGenes <- clustering(sharedBy3, data)****************************************
coseq analysis: Poisson approach & none transformation
K = 2 to 12
Use set.seed() prior to running coseq for reproducible results.
****************************************
Running g = 2 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0"
Running g = 3 ...
[1] "Initialization: 1"
[1] "Log-like diff: 4.90653064844082e-08"
Running g = 4 ...
[1] "Initialization: 1"
[1] "Log-like diff: 9.85664883046411e-11"
Running g = 5 ...
[1] "Initialization: 1"
[1] "Log-like diff: 1.33795765577815e-08"
Running g = 6 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0"
Running g = 7 ...
[1] "Initialization: 1"
[1] "Log-like diff: 5.30698684997333e-09"
Running g = 8 ...
[1] "Initialization: 1"
[1] "Log-like diff: 2.8421709430404e-14"
Running g = 9 ...
[1] "Initialization: 1"
[1] "Log-like diff: 5.6843418860808e-14"
Running g = 10 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0"
Running g = 11 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0"
Running g = 12 ...
[1] "Initialization: 1"
[1] "Log-like diff: 1.79953033807578e-06"
$ICL
$profiles
$boxplots
$probapost_barplots
*************************************************
Model: Poisson
Transformation: none
*************************************************
Clusters fit: 2,3,4,5,6,7,8,9,10,11,12
Clusters with errors: ---
Selected number of clusters via ICL: 12
ICL of selected model: -740101.6
*************************************************
Number of clusters = 12
ICL = -740101.6
*************************************************
Cluster sizes:
Cluster 1 Cluster 2 Cluster 3 Cluster 4 Cluster 5 Cluster 6 Cluster 7
4 22 25 5 2 3 8
Cluster 8 Cluster 9 Cluster 10 Cluster 11 Cluster 12
12 7 17 12 14
Number of observations with MAP > 0.90 (% of total):
130 (99.24%)
Number of observations with MAP > 0.90 per cluster (% of total per cluster):
Cluster 1 Cluster 2 Cluster 3 Cluster 4 Cluster 5 Cluster 6 Cluster 7
4 22 25 5 2 3 8
(100%) (100%) (100%) (100%) (100%) (100%) (100%)
Cluster 8 Cluster 9 Cluster 10 Cluster 11 Cluster 12
12 7 17 11 14
(100%) (100%) (100%) (91.67%) (100%)
a$cluster <- clusteredGenes[a$ensembl_gene_id]
entrezID <- list()
nb_clust = max(clusteredGenes)
for (clust in seq(1:nb_clust)) {
# print(entrez[entrez$cluster == clust,]$ensembl_transcript_id)
entrezID[[length(entrezID) + 1]] <- na.omit(a[a$cluster == clust, ]$entrezgene_id)
}
names(entrezID) <- as.character(seq(1:nb_clust))
ck <- compareCluster(geneCluster = entrezID, fun = "enrichGO", OrgDb = org.At.tair.db,
ont = "BP", pAdjustMethod = "BH", pvalueCutoff = 0.01, qvalueCutoff = 0.05)
clusterProfiler::dotplot(ck, x = ~Cluster)# On essaie un autre clustering avec lka librarie MPLN mpln (dataset =
# as.matrix(data[sharedBy3,])) beaucoup trop long, même mutlithreadé, c'est n'impModel-Based Clustering Using MPLN (Parallelized) Description Performs clustering using mixtures of multivariate Poisson-log normal (MPLN) distribution and model selection using AIC, AIC3, BIC and ICL. Since each component/cluster size (G) is independent from another, all Gs in the range to be tested have been parallelized to run on a seperate core using the parallel R package.
Class: pca dudi
Call: dudi.pca(df = log(data + 0.1), center = TRUE, scale = TRUE, scannf = FALSE,
nf = 4)
Total inertia: 24
Eigenvalues:
Ax1 Ax2 Ax3 Ax4 Ax5
17.2861 4.7060 0.6320 0.5028 0.2615
Projected inertia (%):
Ax1 Ax2 Ax3 Ax4 Ax5
72.025 19.608 2.633 2.095 1.089
Cumulative projected inertia (%):
Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
72.03 91.63 94.27 96.36 97.45
(Only 5 dimensions (out of 24) are shown)
NULL
log.data <- log2(normalized.count[sharedBy3, ] + 1)
Norm.interest.corr <- corr.test(t(log.data), method = "pearson", ci = F)
Norm.interest.corr$p[lower.tri(Norm.interest.corr$p, diag = -TRUE)] = NA
Pval.adj <- as.data.frame(as.table(Norm.interest.corr$p))
Norm.interest.corr$r[lower.tri(Norm.interest.corr$r, diag = TRUE)] = NA
Correlation <- as.data.frame(as.table(Norm.interest.corr$r))
Cor.table <- na.exclude(cbind(Correlation, Pval.adj))[, c(1, 2, 3, 6)]
colnames(Cor.table) <- c("gene1", "gene2", "cor", "p.adj")
Cor.table.filt <- Cor.table[(abs(Cor.table[, 3]) > 0.9 & Cor.table[, 4] < 0.01),
]
g <- graph.data.frame(Cor.table.filt[, 1:2], directed = -FALSE)
V(g)$color <- clusteredGenes[V(g)]
degree <- degree(g)
hist(degree, breaks = 30)Node_nw_st <- data.frame(degree, betweenness)
plot.igraph(g, vertex.size = 5, vertex.label.cex = 0.3, color = clusteredGenes)library(d3r)
data_json <- d3_igraph(g)
write(data_json, "data.json")
write.table(Cor.table.filt, "GraphCO2.txt", sep = "\t", row.names = F, quote = F)
Rank_stat <- rowMeans(cbind(rank(Node_nw_st[, 1]), rank(Node_nw_st[, 2])))
Node_nw_st <- cbind(Node_nw_st, Rank_stat)
write.table(Node_nw_st, file = "StatsCO2.txt", sep = "\t", col.names = NA, quote = F)****************************************
coseq analysis: Poisson approach & none transformation
K = 2 to 12
Use set.seed() prior to running coseq for reproducible results.
****************************************
Running g = 2 ...
[1] "Initialization: 1"
[1] "Log-like diff: 4.59017712728382e-10"
Running g = 3 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0.0857992577874072"
[1] "Log-like diff: 0.0273270790755795"
[1] "Log-like diff: 0.00026883443707959"
[1] "Log-like diff: 3.19062152343008e-06"
Running g = 4 ...
[1] "Initialization: 1"
[1] "Log-like diff: 1.21945280540103e-07"
Running g = 5 ...
[1] "Initialization: 1"
[1] "Log-like diff: 2.64516266668124e-08"
Running g = 6 ...
[1] "Initialization: 1"
[1] "Log-like diff: 8.91520812729141e-09"
Running g = 7 ...
[1] "Initialization: 1"
[1] "Log-like diff: 3.85815361880759e-08"
Running g = 8 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0.731106217676853"
[1] "Log-like diff: 0.876331706008131"
[1] "Log-like diff: 1.54299612519634"
[1] "Log-like diff: 3.0168944802933"
[1] "Log-like diff: 5.52771074616917"
Running g = 9 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0.000391092955158712"
[1] "Log-like diff: 9.18728272125691e-06"
Running g = 10 ...
[1] "Initialization: 1"
[1] "Log-like diff: 5.83051829039505e-08"
Running g = 11 ...
[1] "Initialization: 1"
[1] "Log-like diff: 2.47112143441086e-05"
[1] "Log-like diff: 1.4252105380308e-06"
Running g = 12 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0.00558365885393997"
[1] "Log-like diff: 0.00168267525018351"
[1] "Log-like diff: 0.000489232319583977"
[1] "Log-like diff: 0.000153918916637963"
[1] "Log-like diff: 3.92342648254385e-05"
$ICL
$profiles
$boxplots
$probapost_barplots
*************************************************
Model: Poisson
Transformation: none
*************************************************
Clusters fit: 2,3,4,5,6,7,8,9,10,11,12
Clusters with errors: ---
Selected number of clusters via ICL: 12
ICL of selected model: -3013888
*************************************************
Number of clusters = 12
ICL = -3013888
*************************************************
Cluster sizes:
Cluster 1 Cluster 2 Cluster 3 Cluster 4 Cluster 5 Cluster 6 Cluster 7
141 21 32 24 118 175 100
Cluster 8 Cluster 9 Cluster 10 Cluster 11 Cluster 12
63 76 7 33 47
Number of observations with MAP > 0.90 (% of total):
833 (99.52%)
Number of observations with MAP > 0.90 per cluster (% of total per cluster):
Cluster 1 Cluster 2 Cluster 3 Cluster 4 Cluster 5 Cluster 6 Cluster 7
139 21 32 24 118 174 100
(98.58%) (100%) (100%) (100%) (100%) (99.43%) (100%)
Cluster 8 Cluster 9 Cluster 10 Cluster 11 Cluster 12
63 76 7 33 46
(100%) (100%) (100%) (100%) (97.87%)
Class: pca dudi
Call: dudi.pca(df = log(data + 0.1), center = TRUE, scale = TRUE, scannf = FALSE,
nf = 4)
Total inertia: 24
Eigenvalues:
Ax1 Ax2 Ax3 Ax4 Ax5
19.1712 3.3535 0.5249 0.3943 0.1205
Projected inertia (%):
Ax1 Ax2 Ax3 Ax4 Ax5
79.880 13.973 2.187 1.643 0.502
Cumulative projected inertia (%):
Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
79.88 93.85 96.04 97.68 98.18
(Only 5 dimensions (out of 24) are shown)
NULL
log.data <- log2(normalized.count[sharedBy3, ] + 1)
Norm.interest.corr <- corr.test(t(log.data), method = "pearson", ci = F)
Norm.interest.corr$p[lower.tri(Norm.interest.corr$p, diag = -TRUE)] = NA
Pval.adj <- as.data.frame(as.table(Norm.interest.corr$p))
Norm.interest.corr$r[lower.tri(Norm.interest.corr$r, diag = TRUE)] = NA
Correlation <- as.data.frame(as.table(Norm.interest.corr$r))
Cor.table <- na.exclude(cbind(Correlation, Pval.adj))[, c(1, 2, 3, 6)]
colnames(Cor.table) <- c("gene1", "gene2", "cor", "p.adj")
Cor.table.filt <- Cor.table[(abs(Cor.table[, 3]) > 0.9 & Cor.table[, 4] < 0.01),
]
g <- graph.data.frame(Cor.table.filt[, 1:2], directed = -FALSE)
V(g)$color <- clusteredGenes[V(g)]
degree <- degree(g)
hist(degree, breaks = 30)Node_nw_st <- data.frame(degree, betweenness)
plot.igraph(g, vertex.size = 5, vertex.label.cex = 0.01, color = clusteredGenes)a$cluster <- clusteredGenes[a$ensembl_gene_id]
entrezID <- list()
nb_clust = max(clusteredGenes)
for (clust in seq(1:nb_clust)) {
# print(entrez[entrez$cluster == clust,]$ensembl_transcript_id)
entrezID[[length(entrezID) + 1]] <- na.omit(a[a$cluster == clust, ]$entrezgene_id)
}
names(entrezID) <- as.character(seq(1:nb_clust))
ck <- compareCluster(geneCluster = entrezID, fun = "enrichGO", OrgDb = org.At.tair.db,
ont = "BP", pAdjustMethod = "BH", pvalueCutoff = 0.01, qvalueCutoff = 0.05)
clusterProfiler::dotplot(ck, x = ~Cluster)****************************************
coseq analysis: Poisson approach & none transformation
K = 2 to 12
Use set.seed() prior to running coseq for reproducible results.
****************************************
Running g = 2 ...
[1] "Initialization: 1"
[1] "Log-like diff: 1.06819442180495e-10"
Running g = 3 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0.0758628365212921"
[1] "Log-like diff: 0.0198623147528725"
[1] "Log-like diff: 0.00525704057190701"
[1] "Log-like diff: 0.00139919104513453"
[1] "Log-like diff: 0.000341532015648127"
Running g = 4 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0.0111170800072813"
[1] "Log-like diff: 0.00141139826298442"
[1] "Log-like diff: 0.000161397428641408"
[1] "Log-like diff: 2.2397745002678e-05"
[1] "Log-like diff: 2.78974421874523e-06"
Running g = 5 ...
[1] "Initialization: 1"
[1] "Log-like diff: 1.06375966701933e-06"
Running g = 6 ...
[1] "Initialization: 1"
[1] "Log-like diff: 61.6606733135965"
[1] "Log-like diff: 220.279988254277"
[1] "Log-like diff: 343.597473475302"
[1] "Log-like diff: 118.657798326178"
[1] "Log-like diff: 89.3377189725218"
Running g = 7 ...
[1] "Initialization: 1"
[1] "Log-like diff: 4569.7505378941"
[1] "Log-like diff: 3260.30658412625"
[1] "Log-like diff: 427.290036720683"
[1] "Log-like diff: 812.707607252818"
[1] "Log-like diff: 292.797388408702"
Running g = 8 ...
[1] "Initialization: 1"
[1] "Log-like diff: 46.0926226393776"
[1] "Log-like diff: 35.3812812980884"
[1] "Log-like diff: 12.9785765602482"
[1] "Log-like diff: 0.756511837942664"
[1] "Log-like diff: 0.37566109213202"
Running g = 9 ...
[1] "Initialization: 1"
[1] "Log-like diff: 47.7096958256379"
[1] "Log-like diff: 66.4585845237957"
[1] "Log-like diff: 22.8816925488037"
[1] "Log-like diff: 314.666696217909"
[1] "Log-like diff: 181.581898960294"
Running g = 10 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0.461052803007771"
[1] "Log-like diff: 0.256458121682254"
[1] "Log-like diff: 0.106759220704497"
[1] "Log-like diff: 0.0339550212833508"
[1] "Log-like diff: 0.011994087019815"
Running g = 11 ...
[1] "Initialization: 1"
[1] "Log-like diff: 28.7888473062168"
[1] "Log-like diff: 1.63534313289344"
[1] "Log-like diff: 0.63011593410125"
[1] "Log-like diff: 0.253746859510482"
[1] "Log-like diff: 0.0982710563896738"
Running g = 12 ...
[1] "Initialization: 1"
[1] "Log-like diff: 80.0953725504379"
[1] "Log-like diff: 5.29929211413723"
[1] "Log-like diff: 42.6136182485174"
[1] "Log-like diff: 14.7319905007355"
[1] "Log-like diff: 25.0172757256348"
$ICL
$profiles
$boxplots
$probapost_barplots
*************************************************
Model: Poisson
Transformation: none
*************************************************
Clusters fit: 2,3,4,5,6,7,8,9,10,11,12
Clusters with errors: ---
Selected number of clusters via ICL: 12
ICL of selected model: -3391000
*************************************************
Number of clusters = 12
ICL = -3391000
*************************************************
Cluster sizes:
Cluster 1 Cluster 2 Cluster 3 Cluster 4 Cluster 5 Cluster 6 Cluster 7
276 73 112 172 41 302 24
Cluster 8 Cluster 9 Cluster 10 Cluster 11 Cluster 12
751 121 104 609 256
Number of observations with MAP > 0.90 (% of total):
2772 (97.57%)
Number of observations with MAP > 0.90 per cluster (% of total per cluster):
Cluster 1 Cluster 2 Cluster 3 Cluster 4 Cluster 5 Cluster 6 Cluster 7
265 73 106 167 40 294 22
(96.01%) (100%) (94.64%) (97.09%) (97.56%) (97.35%) (91.67%)
Cluster 8 Cluster 9 Cluster 10 Cluster 11 Cluster 12
743 116 100 599 247
(98.93%) (95.87%) (96.15%) (98.36%) (96.48%)
Class: pca dudi
Call: dudi.pca(df = log(data + 0.1), center = TRUE, scale = TRUE, scannf = FALSE,
nf = 4)
Total inertia: 24
Eigenvalues:
Ax1 Ax2 Ax3 Ax4 Ax5
22.01676 1.12356 0.26987 0.11457 0.06979
Projected inertia (%):
Ax1 Ax2 Ax3 Ax4 Ax5
91.7365 4.6815 1.1245 0.4774 0.2908
Cumulative projected inertia (%):
Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
91.74 96.42 97.54 98.02 98.31
(Only 5 dimensions (out of 24) are shown)
NULL
log.data <- log2(normalized.count[sharedBy3, ] + 1)
Norm.interest.corr <- corr.test(t(log.data), method = "pearson", ci = F)
Norm.interest.corr$p[lower.tri(Norm.interest.corr$p, diag = -TRUE)] = NA
Pval.adj <- as.data.frame(as.table(Norm.interest.corr$p))
Norm.interest.corr$r[lower.tri(Norm.interest.corr$r, diag = TRUE)] = NA
Correlation <- as.data.frame(as.table(Norm.interest.corr$r))
Cor.table <- na.exclude(cbind(Correlation, Pval.adj))[, c(1, 2, 3, 6)]
colnames(Cor.table) <- c("gene1", "gene2", "cor", "p.adj")
Cor.table.filt <- Cor.table[(abs(Cor.table[, 3]) > 0.9 & Cor.table[, 4] < 0.01),
]
g <- graph.data.frame(Cor.table.filt[, 1:2], directed = -FALSE)
V(g)$color <- clusteredGenes[V(g)]
degree <- degree(g)
hist(degree, breaks = 30)Node_nw_st <- data.frame(degree, betweenness)
plot.igraph(g, vertex.size = 5, vertex.label.cex = 0.01, color = clusteredGenes)a$cluster <- clusteredGenes[a$ensembl_gene_id]
entrezID <- list()
nb_clust = max(clusteredGenes)
for (clust in seq(1:nb_clust)) {
# print(entrez[entrez$cluster == clust,]$ensembl_transcript_id)
entrezID[[length(entrezID) + 1]] <- na.omit(a[a$cluster == clust, ]$entrezgene_id)
}
names(entrezID) <- as.character(seq(1:nb_clust))
ck <- compareCluster(geneCluster = entrezID, fun = "enrichGO", OrgDb = org.At.tair.db,
ont = "BP", pAdjustMethod = "BH", pvalueCutoff = 0.01, qvalueCutoff = 0.05)
clusterProfiler::dotplot(ck, x = ~Cluster)Réseau CO2
La tomate semble répondre différemment dans certaines mesures : plus d’effet du CO2, effet moindre du fer, effet nitrate plutôt similaire.
Ontologies moins fournies pour la tomate
Réseau CO2
DEG en commun entre les 4 comparaisons possibles pour l’effet d’un facteur (Venn diagrams)
Fait sur l’ensemble des transcriptômes (plus large que les transcriptômes sur lesquels les DEG ont été détectés)
Relevance Network fait comme Rodrigo. et Al, seuil sur la valeur de corréation et sur la pvalue, gènes triés sur leur centralité et connectivité
Visualisés dans igraph après Clustering
Visualisés dans Cytoscape, pus clustering de communautés (pluggins) et analyse d’enrichissement d’ontologies
Réseau CO2